rm(list=ls())

#Install these packages as needed
# install.packages(plyr)
# install.packages('ggmap')
# install.packages("mapproj")
# install.packages('coefplot')
# install.packages('AER')
# install.packages("cowplot")
# install.packages('gmapsdistance')
# install.packages("googleway")
# install.packages('plotrix')
# install.packages('questionr')
# install.packages('lattice')
# install.packages('stargazer')
# install.packages('randomizr')
# install.packages('ri')
# install.packages('aods3')
# install.packages('lmtest')
# install.packages('matrixStats')
# install.packages('quantreg')
# install.packages('dplyr')
# install.packages('readr')
# install.packages('rms')
# install.packages('multiwayvcov')
# install.packages('gridExtra')
# install.packages(sandwich)
# install.packages(Matrix)


library("plyr")
library('ggmap')
library('coefplot')
library('AER')
library("cowplot")
library("googleway")
library('plotrix')
library('questionr')
library('lattice')
library('stargazer')
library('randomizr')
library('ri')
library('aods3')
library('lmtest')
library('matrixStats')
library('quantreg')
library('dplyr')
library('readr')
library('rms')
library('multiwayvcov')
library('gridExtra')
library(sandwich)
library(Matrix)

set.seed(1234567)


#set your working directory
#setwd("")


data=read.csv('hhdata.csv')

ind=read.csv('inddata.csv')



#functions for the analysis

#functions for balance tests

#calculate balance on hh variables
bal_=function(x){
  
  data$x=with(data,eval(parse(text=x)))
  c= data$x[data$t==0]
  lm=lm(x~t+t*centeredB,data=data)
  vcov=vcov(lm,type='HC2')
  coef=coeftest(lm, vcov)
  bal.=as.vector(c(coef[1,1],coef[2,1],coef[2,2],coef[2,4]))%>% round(.,digits=3)
  return(bal.)
}

#add labels to balance

mean.data_=function(x){
  covs=as.data.frame(x)
  mc=(apply(covs,1,FUN=bal_))%>%t()%>%as.data.frame()
  mc=cbind.data.frame(covs,mc)
  colnames(mc)=c('Variable','Control ','Treatment ','sd','Pr(>|t|)')
  return(mc)}

#calculate balance on individual variables


bal_2=function(x){
  
  ind$x=with(ind,eval(parse(text=x)))
  c= ind$x[ind$t==0]
  lm=lm(x~t+t*centeredB,data=ind)
  vcov=cluster.vcov(lm,cluster=ind$X)
  coef=coeftest(lm, vcov)
  bal.=as.vector(c(coef[1,1],coef[2,1],coef[2,2],coef[2,4]))%>% round(.,digits=3)
  return(bal.)
}

#add labels to balance


mean.data_2=function(x){
  covs=as.data.frame(x)
  mc=(apply(covs,1,FUN=bal_2))%>%t()%>%as.data.frame()
  mc=cbind.data.frame(covs,mc)
  colnames(mc)=c('Variable','Control ','Treatment ','sd','Pr(>|t|)')
  return(mc)}


#functions for hh level treatment effects


#calculate effects
effect_hh=function(y){
  data$y=y
  c= data$y[data$t==0]
  lm=lm(y~t+t*centeredB, data=data)
  vcov=vcov(lm,type='HC2')
  coef=coeftest(lm, vcov)
  bal.=as.vector(c(coef[1,1],coef[2,1],coef[2,2],coef[2,4]))%>% round(.,digits=3)
  return(bal.)
}


#add labels
mean.data_hh=function(x){
  covs=as.data.frame(x)
  
  mc=apply(covs,2,FUN=effect_hh)%>%t()%>%as.data.frame()
  mc=cbind.data.frame(colnames(covs),mc)
  colnames(mc)=c('Variable','Control ','Treatment ','sd','Pr(>|t|)')
  mc$`Pr(>|t|)`=p.adjust(mc$`Pr(>|t|)`,method="BH")
  return(mc)}



#functions for individual level treatment effects

#calculate effects
effect_i=function(y){
  ind$y=y
  c= ind$y[ind$t==0]
  lm=lm(y~t+t*centeredB, data=ind)
  vcov=cluster.vcov(lm,cluster=ind$X)
  coef=coeftest(lm, vcov)
  bal.=as.vector(c(mean(c,na.rm=TRUE),coef[2,1],coef[2,2],coef[2,4]))%>% round(.,digits=3)
  return(bal.)
}

#add labels
mean.data_i=function(x){
  covs=as.data.frame(x)
  
  mc=apply(covs,2,FUN=effect_i)%>%t()%>%as.data.frame()
  mc=cbind.data.frame(colnames(covs),mc)
  colnames(mc)=c('Variable','Control ','Treatment ','sd','Pr(>|t|)')
  mc$`Pr(>|t|)`=p.adjust(mc$`Pr(>|t|)`,method="BH")
  return(mc)}





#Center the block dummies function
center_apply <- function(x) {
  apply(x, 2, function(y) y - mean(y))
}
# apply it to the hh and individual level data
data$centeredB=model.matrix( ~ as.factor(data$block) - 1) %>% center_apply

ind$centeredB=model.matrix( ~ as.factor(ind$block) - 1) %>% center_apply




# Table 1.  From the author's notes, no code required ---------------------


# Table 2: Balance tests on household and individual characteristics as measured through a survey.------------------



#Panel A, Household Characteristics


with(data,mean.data_(c('OBC','SCST','maratha','Muslim','kutchafloor', 'always',
                      'kutcharoof','same','date')))%>%stargazer(.,summary=FALSE,rownames = FALSE,type="text")


#Panel B, Individual characteristics




with(ind,mean.data_2(c('member1age','female1','OBC','SCST','maratha','Muslim','kutchafloor',
                        'kutcharoof','always','same')))%>%stargazer(.,summary=FALSE,rownames = FALSE,type="text")



# Table 3: Reduced form treatment effects.  --------


#A: Housing quality

mean.data_hh(with(data,cbind.data.frame(
  floor,roof,tap,toilet)))%>%stargazer(.,summary=FALSE,rownames = FALSE,type="text")

#B: Assets and monthly income


mean.data_hh(with(data,cbind.data.frame(
  inc1,inc2,inc3,inc4,inc5,inc6,inc7,n_assets)))%>%stargazer(.,summary=FALSE,rownames = FALSE,type="text")


#C: Resources for emergency expenditures



mean.data_hh(with(data,
                  cbind.data.frame( savings, fff, lender, bank,
                                    resourcesidk
                  )))%>%stargazer(.,summary=FALSE,rownames = FALSE,type="text")


#D: HH-level education and employment


mean.data_hh(with(data,
                  cbind.data.frame(pschoolsons,pschooldau,engsons,engdaught,
                                   tuitions,tuitiond,salaried, govtw, formal)))%>%stargazer(.,summary=FALSE,rownames = FALSE,type="text")

#E: Individual-level education and employment




mean.data_i(with(ind,cbind.data.frame(educ1,work1,workfull1,workpart1)))%>%stargazer(.,summary=FALSE,rownames = FALSE,type="text")

#F: Future-looking attitudes 



mean.data_hh(with(data,
                  cbind.data.frame(happyfinances,childrenlives,neverleave,unsureleave)))%>%stargazer(.,summary=FALSE,rownames = FALSE,type="text")



#G: Individualistic attitudes

mean.data_hh(with(data,
                  cbind.data.frame(trust,success,owndec
                  )))%>%stargazer(.,summary=FALSE,rownames = FALSE,type="text")


#H: Ward level neighborhood characteristics

mean.data_hh(with(data,
                  cbind.data.frame(HHsize,sexratio, scratio, stratio, litratio, workratio, workmainratio, workmargratio
                  )))%>%stargazer(.,summary=FALSE,rownames = FALSE,type="text")



# Postal code-level characteristics


mean.data_hh(with(data,
                  cbind.data.frame( sr.sec,pub, rooms, rooms.puc, headroom,library,profteach, english
                  )))%>%stargazer(.,summary=FALSE,rownames = FALSE,type="text")





#create variables for the age individuals completed between winning the lottery and being surveyed
#turned 6
ind$turn=ifelse((ind$member1age>6) &(ind$member1age-6<=ind$elapse),1,0)
#turned16
ind$turn2=ifelse((ind$member1age>16) &(ind$member1age-16<=ind$elapse),1,0)
#turned18
ind$turn3=ifelse((ind$member1age>18) &(ind$member1age-18<=ind$elapse),1,0)
#turned21
ind$turn4=ifelse((ind$member1age>21) &(ind$member1age-21<=ind$elapse),1,0)
#older
ind$older=ifelse((ind$member1age-21>ind$elapse),1,0)

#Table 4: Regressions of individual completion of various years of education on the treatment indicator-----

#run individual regressions and cluster errors. results printed in the function at the end
a=lm(educ1~t+turn+turn2+turn3+turn4+older+t*centeredB,data=ind)
vcova=cluster.vcov(a,cluster=ind$X)



b=lm(I(educ1>0)~t+t*centeredB,data=ind)
vcovb=cluster.vcov(b,cluster=ind$X)

c=lm(I(educ1>0)~t+t*turn+t*centeredB,data=ind)
vcovc=cluster.vcov(c,cluster=ind$X)
c.cof=coeftest(c,vcov=vcovc)


d=lm(I(educ1>10)~t+t*centeredB,data=ind)
vcovd=cluster.vcov(d,cluster=ind$X)



e=lm(I(educ1>10)~t+t*turn2+t*centeredB,data=ind)
vcove=cluster.vcov(e,cluster=ind$X)


f=lm(I(educ1>12)~t+t*centeredB,data=ind)
vcovf=cluster.vcov(f,cluster=ind$X)


g=lm(I(educ1>12)~t+t*turn3+t*centeredB,data=ind)
vcovg=cluster.vcov(g,cluster=ind$X)

h=lm(I(educ1==15)~t+t*centeredB,data=ind)
vcovh=cluster.vcov(h,cluster=ind$X)


i=lm(I(educ1==15)~t+t*turn4+t*centeredB,data=ind)
vcovi=cluster.vcov(i,cluster=ind$X)

j=lm(educ1~t+t*older+t*centeredB,data=ind)
vcovj=cluster.vcov(j,cluster=ind$X)



stargazer(a,j, b,c,d,e,f,g,h,i, se=list(sqrt(diag(vcova)),sqrt(diag(vcovj)),sqrt(diag(vcovb)),sqrt(diag(vcovc)),sqrt(diag(vcovd)),
                                    sqrt(diag(vcove)),sqrt(diag(vcovf)),sqrt(diag(vcovg)),sqrt(diag(vcovh)),sqrt(diag(vcovi))),
        #  covariate.labels = c("T", "Turned6", "Turned16", "Turned18", "Turned21", "TXTurned6", 
                        #       "TXTurned16", "TXTurned18","TXTurned21"),
        #  dep.var.labels   = c("Years of education",
                           #    "I(>0 years)", "I(>10 years)", "I(>12 years)","I(=15 years)") ,
          
          type='text',omit.stat = "f",no.space = TRUE, omit='block')




#Table 5: Regressions of individual employment on the treatment indicator-----

#run individual regressions and cluster errors. results printed in the function at the end

z=lm(work1~t+turn+turn2+turn3+turn4+older+t*centeredB,data=ind)
vcovz=cluster.vcov(z,cluster=ind$X)


a=lm(work1~t+t*centeredB,data=ind)
vcova=cluster.vcov(a,cluster=ind$X)



c=lm(work1~t+t*turn+t*centeredB,data=ind)
vcovc=cluster.vcov(c,cluster=ind$X)





e=lm(work1~t+t*turn2+t*centeredB,data=ind)
vcove=cluster.vcov(e,cluster=ind$X)




g=lm(work1~t+t*turn3+t*centeredB,data=ind)
vcovg=cluster.vcov(g,cluster=ind$X)


i=lm(work1~t+t*turn4+t*centeredB,data=ind)
vcovi=cluster.vcov(i,cluster=ind$X)

j=lm(work1~t*older+t*centeredB,data=ind)
vcovj=cluster.vcov(j,cluster=ind$X)





z.=lm(workfull1~t+turn+turn2+turn3+turn4+older+t*centeredB,data=ind)
vcovz.=cluster.vcov(z.,cluster=ind$X)



c.=lm(workfull1~t+t*turn+t*centeredB,data=ind)
vcovc.=cluster.vcov(c.,cluster=ind$X)


e.=lm(workfull1~t+t*turn2+t*centeredB,data=ind)
vcove.=cluster.vcov(e.,cluster=ind$X)


g.=lm(workfull1~t+t*turn3+t*centeredB,data=ind)
vcovg.=cluster.vcov(g.,cluster=ind$X)


i.=lm(workfull1~t+t*turn4+t*centeredB,data=ind)
vcovi.=cluster.vcov(i.,cluster=ind$X)

j.=lm(workfull1~t*older+t*centeredB,data=ind)
vcovj.=cluster.vcov(j.,cluster=ind$X)


e_=lm(workpart1~t+t*turn2+t*centeredB,data=ind)
vcove_=cluster.vcov(e_,cluster=ind$X)


g_=lm(workpart1~t+t*turn3+t*centeredB,data=ind)
vcovg_=cluster.vcov(g_,cluster=ind$X)


i_=lm(workpart1~t+t*turn4+t*centeredB,data=ind)
vcovi_=cluster.vcov(i_,cluster=ind$X)

j_=lm(workpart1~t*older+t*centeredB,data=ind)
vcovj_=cluster.vcov(j_,cluster=ind$X)



stargazer(z,c,e,g,i,j,e.,g.,i.,j.,e_,g_,i_,j_, se=list(sqrt(diag(vcovz)),sqrt(diag(vcovc)),
                               sqrt(diag(vcove)),sqrt(diag(vcovg)),sqrt(diag(vcovi)),sqrt(diag(vcovj)),
                               
                               sqrt(diag(vcove.)),sqrt(diag(vcovg.)),sqrt(diag(vcovi.)),sqrt(diag(vcovj.)),
                               sqrt(diag(vcove_)),sqrt(diag(vcovg_)),sqrt(diag(vcovi_)),sqrt(diag(vcovj_))
                            ),
          covariate.labels = c("T", "Turned6", "Turned16", "Turned18", "Turned21", "Older", "TXTurned6", 
                               "TXTurned16", "TXTurned18","TXTurned21", "TXOlder"),
          dep.var.labels   = c("Employed",
                               "Employed (full-time)",
                               "Employed (part-time)") ,
          type='text',omit.stat = "f",no.space = TRUE, omit='block')











